# -*- coding: utf-8 -*-
import urllib.request
from urllib import request,parse
from bs4 import BeautifulSoup

#取到某个贴吧第几到第几页的帖子
#先把链接弄出来
def tiebaSpider(beginpage,endpage,k):
    li=[];
    url="https://tieba.baidu.com/f"
    kw=parse.quote(k);
    for page in range(beginpage,endpage+1):
        pn=page*50;
        pullurl=url+"?kw="+kw+"&ie=utf-8&pn="+str(pn)
        print(pullurl);
        tlist=parseHtml(pullurl);
        for x in tlist:
            li.append(x);
            print(x);
    return li;

#解析一个贴吧页面
def parseHtml(url):
    tlist=[];
    headers = {"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"};
    re = request.Request(url, headers=headers);
    repon=request.urlopen(re);
    buff=repon.read();
    html=buff.decode("utf-8");
    soup=BeautifulSoup(html);
    content=BeautifulSoup(str(soup.find(attrs={"id":"thread_list"})));
    tiezi=content.find_all(attrs={"class":" j_thread_list clearfix"});
    for sou in tiezi:
        so=BeautifulSoup(str(sou));
        title=so.find("a").get_text();
        tlist.append(title);
    return tlist;

tiebaSpider(0,7,"华中师范大学");